import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport
data_url = './data/ab_us_2020.csv'
df = pd.read_csv(data_url)
print(f"Dataset: {df.shape}")
df.head()
ProfileReport(df)
# Drop irrelevant features
df = df.copy().drop(
columns=['id', 'name', 'host_id', 'host_name',
'neighbourhood_group', 'reviews_per_month', 'last_review']
)
# Feature and target matrices
target = 'price'
X = df.drop(target, axis=1)
y = df[target]
print(f"Features: {X.shape}")
X.head()